
"""
HTMLTextToPDF.py
A demo program to show how to convert the text extracted from HTML 
content, to PDF. It uses the Beautiful Soup library, v4, to 
parse the HTML.
Author: Vasudev Ram - http://www.dancingbison.com
Copyright 2015 Vasudev Ram
"""

import sys
from bs4 import BeautifulSoup
from PDFWriter import PDFWriter

def usage():
    sys.stderr.write("Usage: python " + sys.argv[0] + " html_file pdf_file\n")
    sys.stderr.write("which will extract only the text from html_file and\n")
    sys.stderr.write("write it to pdf_file\n")

def main():

    # Create some HTML for testing conversion of its text to PDF.
    html_doc = """
    <html>
        <head>
            <title>
            Test file for HTMLTextToPDF
            </title>
        </head>
        <body>
        This is text within the body element but outside any paragraph.
        <p>
        This is a paragraph of text. Hey there, how do you do?
        The quick red fox jumped over the slow blue cow.
        </p>
        <p>
        This is another paragraph of text.
        Don't mind what it contains.
        What is mind? Not matter.
        What is matter? Never mind.
        </p>
        This is also text within the body element but not within any paragraph.
        </body>
    </html>
    """

    pw = PDFWriter("HTMLTextTo.pdf")
    pw.setFont("Courier", 10)
    pw.setHeader("Conversion of HTML text to PDF")
    pw.setFooter("Generated by xtopdf: http://slid.es/vasudevram/xtopdf")
 
    # Use method chaining this time.
    for line in BeautifulSoup(html_doc).get_text().split("\n"):
        pw.writeLine(line)
    pw.savePage()
    pw.close()

if __name__ == '__main__':
    main()

